# 
# generateDataExp <- function(N, alpha, beta){
#   #set.seed(1)
#   X <- rexp(n = N, rate = 2)
#   # binary outcome
#   pi <- exp(alpha + beta*X)/(exp(alpha + beta*X) + 1) 
#   Y2 <- rbinom(N, size = 1, prob = pi)
#   #print(pi)
#   return(data.frame(Y2, X))
# }
# 
# 
# coef <- 'X'
# P <- 100
# n <- 2*10^5
# nsims <- 250
# epsilons <- c(0.1, 0.25, 0.5, 0.75, 1)
# 
# # Run simulations 
# 
# sims <- lapply(epsilons, function(i) {
#   return(lapply(1:nsims, function(j) {
#     dat <- generateDataExp(N = n, alpha = 0.25, beta = 0.25)
#     sim <- algorithmUDP(
#       data = dat, statistic = logitCoefFn, B = 5, n = n / P,
#       P = P, lambda = 0.3, lambda_var = 1, delta = 1 / n,
#       epsilon = i, epsilon_alpha = 1, parallelize = TRUE, censoring_cutoff = 1,
#       bias_cutoff = 0, disjoint = T, form = as.formula(Y2 ~ X), coef = coef
#     )
#     return(sim)
#   }))
#   }
# )
# 
# 
# save(sims, file = 'DGP II/sims_new_DGP.Rdata')


# Analyze 

load('DGP II/sims_new_DGP.Rdata')

epsilons <- c(0.1, 0.25, 0.5, 0.75, 1)

sims_df <- do.call(rbind, lapply(1:length(sims), function(j){
  do.call(rbind, lapply(1:length(sims[[j]]), function(i) {
    return(c(tilde = sims[[j]][[i]]$theta_tilde, hat = sims[[j]][[i]]$theta_hat, 
             var = sims[[j]][[i]]$var_est, epsilon = sims[[j]][[i]]$epsilon, 
             no_noise = sims[[j]][[i]]$theta_blb, 
             censored  = sims[[j]][[i]]$a_2))
  }))
})) %>% as.data.frame()


sims_summary <- sims_df %>% 
  subset(epsilon > 0.1) %>%
  group_by(epsilon) %>%
  summarize(theta_tilde = mean(tilde), 
            theta_hat = mean(hat), 
            true_sd = sqrt(var(tilde)), 
            est_sd = sqrt(mean(var)), 
            mean(censored))


bias_plot <- ggplot(sims_summary) + geom_line(aes(x = epsilon, y = theta_tilde - 0.25), col = 'navyblue') + 
  geom_line(aes(x = epsilon, y = theta_hat - 0.25), col = 'orange3') + 
  geom_point(aes(x = epsilon, y = theta_tilde - 0.25), col = 'navyblue') + 
  geom_point(aes(x = epsilon, y = theta_hat - 0.25), col = 'orange3') +
  geom_hline(aes(yintercept = 0), linetype = 'dashed') + 
  annotate(geom = 'text', x = 0.8, y = 0.01, label = TeX('$\\tilde{\\theta}$'), col = 'navyblue') +
  annotate(geom = 'text', x = 0.4, y = -0.03, label = TeX('$\\hat{\\theta}$'), col = 'orange3') +
  theme_bw() + ylim(c(-0.05, 0.05)) + scale_x_continuous(breaks = epsilons[-1]) + 
  labs(x = expression(epsilon), y = 'Bias')
  

std_error_plot <- ggplot(sims_summary) + geom_line(aes(x = epsilon, y = est_sd), col = 'navyblue') + 
  geom_line(aes(x = epsilon, y = true_sd), col = 'orange3') + 
  geom_point(aes(x = epsilon, y = est_sd), col = 'navyblue') + 
  geom_point(aes(x = epsilon, y = true_sd), col = 'orange3') +
  theme_bw() + scale_x_continuous(breaks = epsilons[-1]) + 
  annotate(geom = 'text', x = 0.9, y = 0.022, label = TeX('$\\hat{sd}(\\theta)$'), col = 'navyblue') +
  annotate(geom = 'text', x = 0.5, y = 0.021, label = TeX('$sd(\\theta)$'), col = 'orange3')+
  labs(x = expression(epsilon), y = 'Std. Error')

ggsave('plots/figA2a.pdf', bias_plot, height = 4, width = 5)
ggsave('plots/figA2b.pdf', std_error_plot, height = 4, width = 5)
  